{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['',\n",
       " '/Users/wuzhong/gitee/text-classify-course/notebook',\n",
       " '/usr/local/Cellar/python/3.7.1/Frameworks/Python.framework/Versions/3.7/lib/python37.zip',\n",
       " '/usr/local/Cellar/python/3.7.1/Frameworks/Python.framework/Versions/3.7/lib/python3.7',\n",
       " '/usr/local/Cellar/python/3.7.1/Frameworks/Python.framework/Versions/3.7/lib/python3.7/lib-dynload',\n",
       " '/usr/local/lib/python3.7/site-packages',\n",
       " '/usr/local/lib/python3.7/site-packages/IPython/extensions',\n",
       " '/Users/wuzhong/.ipython',\n",
       " '../',\n",
       " '/Users/wuzhong/gitee/text-classify-course']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sys\n",
    "sys.path.append(\"../\")\n",
    "sys.path.append(\"/Users/wuzhong/gitee/text-classify-course\")\n",
    "sys.path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /var/folders/7w/2zfqy3d94l5fc7ckfvzphnbw0000gp/T/jieba.cache\n",
      "Loading model cost 0.957 seconds.\n",
      "Prefix dict has been built succesfully.\n"
     ]
    }
   ],
   "source": [
    "import src.core.PandasUtils as pandasutils\n",
    "import src.core.tf_idf as tfidf\n",
    "import src.core.CacheUtils as cacheutils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = '../dist/train_data_full.pkl'\n",
    "df = cacheutils.load_model(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4773 entries, 0 to 4772\n",
      "Columns: 36113 entries, 0% to ､\n",
      "dtypes: float64(36113)\n",
      "memory usage: 1.3 GB\n"
     ]
    }
   ],
   "source": [
    "df[\"X\"].info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import src.core.train as mytrain\n",
    "import numpy as np\n",
    "\n",
    "y_train = df[\"y\"]\n",
    "X_train = np.array(df[\"X\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start train\n"
     ]
    }
   ],
   "source": [
    "print(\"start train\")\n",
    "model = mytrain.fit_xgb(X_train, y_train)\n",
    "print(\"finish train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mytrain.metrics(model, X_train, y_train)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
